### Configuration
################################################################################
### R Options
nodeInfo = system(paste0("scontrol show node ", Sys.info()["nodename"]), intern = TRUE)
cpus = as.integer(unlist(strsplit(unlist(strsplit(nodeInfo[grep("CfgTRES", nodeInfo)], "cpu="))[2], ","))[1])
memInKB = as.integer(unlist(strsplit(unlist(strsplit(nodeInfo[grep("CfgTRES", nodeInfo)], "mem="))[2], "M,"))[1]) * 1024^2

options(stringsAsFactors=FALSE,
        dplyr.summarise.inform=FALSE, 
        future.globals.maxSize=min(memInKB, 20 * 1024^3),
        mc.cores=min(cpus,1),
        future.fork.enable=TRUE, future.plan="multicore",
        future.rng.onMisuse="ignore")

### Fuctions
# Git directory and files to source must be done first, then all helper functions can be sourced
git_files_to_source = c("functions_analysis.R", 
                        "functions_biomart.R", 
                        "functions_degs.R", 
                        "functions_io.R", 
                        "functions_plotting.R", 
                        "functions_util.R")
git_files_to_source = file.path(param$path_to_git, "R", git_files_to_source)
file_exists = purrr::map_lgl(git_files_to_source, file.exists)
if (any(!file_exists)) stop(paste("The following files could not be found:", paste(git_files_to_source[!file_exists], collapse=", "), ". Please check the git directory at '", param$path_to_git, "'.!"))
invisible(purrr::map(git_files_to_source, source))

### Debugging mode
# 'default_debugging' for default, 'terminal_debugger' for debugging without X11, 'print_traceback' for non-interactive sessions 
param$debugging_mode = "default_debugging"
switch (param$debugging_mode, 
        default_debugging=on_error_default_debugging(), 
        terminal_debugger=on_error_start_terminal_debugger(),
        print_traceback=on_error_just_print_traceback(),
        on_error_default_debugging())
### Rmarkdown configuration
################################################################################

### Create output directories
if (!file.exists(file.path(param$path_out, "figures"))) dir.create(file.path(param$path_out, "figures"), recursive=TRUE, showWarnings=FALSE)
if (!file.exists(file.path(param$path_out, "data"))) dir.create(file.path(param$path_out, "data"), recursive=TRUE, showWarnings=FALSE)


### R Options
options(citation_format="pandoc", 
        knitr.table.format="html",
        kableExtra_view_html=TRUE)


### Knitr default options
knitr::opts_chunk$set(echo=TRUE,                     # output code
                      cache=FALSE,                   # do not cache results
                      message=TRUE,                  # show messages
                      warning=TRUE,                  # show warnings
                      results = "hold",              # show results after running whole chunk
                      tidy=FALSE,                    # do not auto-tidy-up code
                      fig.width=10,                  # default fig width in inches
                      class.source='fold-hide',      # by default collapse code blocks
                      dev=c('png', 'svg', 'tiff'),          # create figures in png, tiff, and svg; the first device (png) will be used for HTML output
                      dev.args=list(png = list(type="cairo"),     # png: use cairo - works on cluster
                                    tiff = list(type="cairo", compression = 'zip')),  # tiff: use cairo - works on cluster, transparent background, compression type "zip" is ‘deflate (Adobe-style)’ 
                      dpi=300,                       # figure resolution
                      fig.retina=2,                 # retina multiplier
                      fig.path=paste0(file.path(param$path_out, "figures"), "/")  # Path for figures in png and pdf format (trailing "/" is needed)
)

### Set output hooks
knitr::knit_hooks$set(message=format_message, warning=format_warning)

### Required libraries
library(magrittr)

### Set the bibliographic environment
# Clear previous bibliography
knitcitations::cleanbib()

# If the DOI publication servers cannot be reached, there will be no citations, knitcitations will not write a references.bib file and pandoc will stop. This makes sure that there is always at least one citation.
bib_ref = knitcitations::read.bibtex(file.path(param$path_to_git,"assets","sc_analysis_references.bib"))
invisible(knitcitations::citep(bib_ref))

### Figure heights
# high figures
fig_large_height = 8
# Single figure, landscape
fig_standard_height = 4
# Two plots alongside (e.g. umaps)
fig_standard2_height = 5
# Three plots alongside (e.g. umaps)
fig_standard3_height = 4
# Four plots alongside (e.g. umaps)
fig_standard4_height = 2.5
# Four plots 2x2 (e.g. umaps)
fig_patchwork4_height = fig_standard2_height * 2
# Six plots 2x3 (e.g. umaps)
fig_patchwork6_height = fig_standard3_height * 2
# Eight plots 4x2 (e.g. umaps)
fig_patchwork8_height = fig_standard4_height * 2
# Twelve plots 4x3 (e.g. umaps)
fig_patchwork12_height = fig_standard4_height * 3
# Sixteen plots 4x4 (e.g. umaps)
fig_patchwork16_height = fig_standard4_height * 4
# Load renv
renv::load(file.path(param$path_to_git,"env/basic"))

# Required libraries
library(liana)
library(ComplexHeatmap)
library(patchwork)
library(magrittr)
library(ggplot2)
library(ggsci)

Introduction

Cell-cell communication (CCC) is a process by which cells react to stimuli during many biological processes. CCC commonly focuses on ligand binding to its corresponding receptors but also includes interactions via extra-cellular matrix proteins and transporters. The stimuli commonly elicit in the receiver cells (the cells receiving the signal) a downstream response such as the induction of canonical pathways and transcription factor binding.

CCC inference methods usually use one of the following approaches: 1) Prediction of CCC interactions alone, commonly referred to as ligand-receptor inference as performed by LIANA 2) Analysis of downstream intracellular activities induced by CCC as conducted by NicheNet

Here, we will use LIANA to infer the ligand-receptor interactions as described in the LIANA tutorial (https://saezlab.github.io/liana/articles/liana_tutorial.html) and the ‘LIANA x Tensor-cell2cell Quickstart’ vignette (https://ccc-protocols.readthedocs.io/en/latest/notebooks/ccc_R/QuickStart.html).

The results of all CCC inference methods are affected by ligand-receptor prior knowledge resources i.e. the used databases (quality and contend, and each resource comes with its own biases). In this regard, LIANA provides a consensus expert-curated resource that combiness different resources (CellPhoneDBv223, CellChat27, ICELLNET30, connectomeDB202025, CellTalkDB31, as well as 10 others).

LIANA performs multiple CCC inference methods using the consensus resource and combines the results. The different CCC scores obtained by the diverse methods are categorized and translated into a “Magnitude” and “Specificity” value for the specific interactions. The “Magnitude” of an interaction is a measure of the strength of the expression of the interaction partners, while the “Specificity” indicates how specific an interaction is to a given pair sender and reciever cell tpye. Generally, both values are complementary. For example, a ligand-receptor interaction with a high magnitude score in a given pair of cell types is likely to also be specific.

We load the dataset as S4 class Seurat objects.



### Read rds object
################################################################################

### Load Seurat S4 objects 
# Test if file is defined
if (is.null(param$data)) {
  message("Dataset is not specified")
} else {
  # Test if file exists
  if (file.exists(param$data)) {
    # Read object
    message(paste0("Load dataset:", param$data))
    sc = base::readRDS(param$data)
    
    # Transfer original params to loaded object
    if ("parameters" %in% list_names(sc@misc[])) {
      # Retrieve previous parameter settings
      orig_param = sc@misc$parameters
      if ("SCT" %in% names(sc@assays)) {
        if ("scale.data" %in% Layers(sc[["SCT"]])) {
          orig_param$norm = "SCT"
        } else {
          orig_param$norm = "RNA"
        }
      } else {
        orig_param$norm = "RNA"
      }
      
      # Keep some parameter settings from object and project defined
      orig_param_keep = orig_param[c("annot_version", "species")]
      basic_param_keep = param[c("path_to_git", "scriptname", "author", "project_id", "data", "path_out", "file_annot", "file_cc_genes")]
      
      # Integrate parameter
      param = modifyList(x = param, val = orig_param)
      param = modifyList(x = param, val = basic_param_keep)
      param = modifyList(x = param, val = param_advset)
      param = modifyList(x = param, val = orig_param_keep)
    }
    
    
    ### Set colors
    # Set sample colors based on orig.ident
    if (!is.null(sc@meta.data[["orig.ident"]])) {
      if (length(unique(sc@meta.data[["orig.ident"]]))!=length(param$col_samples)) {
        message(paste0("No or wrong number of distinct colors for samples provieded. Generate colors using color palette", param$col_palette_samples))
        sample_colours_out = suppressWarnings(SetSampleColours(sc, param$col_palette_samples)) 
        sc = sample_colours_out[[1]]
        param$col_samples = sample_colours_out[[2]]
      }
    }
 
    # Set colors for clusters based on seurat_clusters 
    if (!is.null(sc@meta.data[["seurat_clusters"]])) {
      if (length(unique(sc@meta.data[["seurat_clusters"]]))!=length(param$col_clusters)) {
        message(paste0("No or wrong number of distinct colors for clusters provieded. Generate colors using color palette", param$col_palette_clusters))
        cluster_colours_out = suppressWarnings(SetClusterColours(sc, param$col_palette_clusters)) 
        sc = cluster_colours_out[[1]]
        param$col_clusters = cluster_colours_out[[2]]
      }
    }

    # Set colors for cell types based on annotation 
    if (!is.null(sc@meta.data[["annotation"]])) {
      if (length(unique(sc@meta.data[["annotation"]]))!=length(param$col_annotation)) {
        message(paste0("No or wrong number of distinct colors for cell tpye annotation provieded. Generate colors using color palette", param$col_palette_annotation))
        annotation_colours_out = suppressWarnings(SetAnnotationColours(sc, param$col_palette_annotation)) 
        sc = annotation_colours_out[[1]]
        param$col_annotation = annotation_colours_out[[2]]
      }
    }

  sc
  } else {
  message("Dataset does not exist")
  }
}

× (Message)
Load dataset:/mnt/ngsnfs/single_cell_dev/scRNAseq_processing/sc_analysis/output/Testdata/cluster_analysis/data/sc.rds

### Load reference Seurat S4 objects if specified
# Test if file is defined
if (!is.null(param$refdata)) {
  # Test if file exists
  if (file.exists(param$refdata)) {
   # Read object
    message(paste0("Load dataset:", param$refdata)) 
    scR = base::readRDS(param$refdata)
    
    # Transfer original params to loaded object
    if ("parameters" %in% list_names(scR@misc[])) {
      orig_paramR = scR@misc$parameters
      
      if (!is.null(orig_paramR$col_samples)) {
        param$col_samples_ref = orig_paramR$col_samples
      }
      if (!is.null(orig_paramR$col_clusters)) {
        param$col_clusters_ref = orig_paramR$col_clusters
      }
      if (!is.null(orig_paramR$col_annotation)) {
        param$col_annotation_ref = orig_paramR$col_annotation
      }
      param = modifyList(x = param, val = param_advset)
    }
    
    ### Set colors
    # Set sample colors based on orig.ident
    if (!is.null(scR@meta.data[["orig.ident"]])) {
      if (length(unique(scR@meta.data[["orig.ident"]]))!=length(param$col_samples_ref)) {
        message(paste0("No or wrong number of distinct colors for samples provieded. Generate colors using color palette", param$col_palette_samples))
        sample_colours_out = suppressWarnings(SetSampleColours(scR, param$col_palette_samples)) 
        scR = sample_colours_out[[1]]
        param$col_samples_ref = sample_colours_out[[2]]
      }
    }
    
    # Set colors for clusters based on seurat_clusters 
    if (!is.null(scR@meta.data[["seurat_clusters"]])) {
      if (length(unique(scR@meta.data[["seurat_clusters"]]))!=length(param$col_clusters_ref)) {
        message(paste0("No or wrong number of distinct colors for clusters provieded. Generate colors using color palette", param$col_palette_clusters))
        cluster_colours_out = suppressWarnings(SetClusterColours(scR, param$col_palette_clusters)) 
        scR = cluster_colours_out[[1]]
        param$col_clusters_ref = cluster_colours_out[[2]]
      }
    }
    
    # Set colors for cell types based on annotation 
    if (!is.null(scR@meta.data[["annotation"]])) {
      if (length(unique(scR@meta.data[["annotation"]]))!=length(param$col_annotation_ref)) {
        message(paste0("No or wrong number of distinct colors for cell tpye annotation provieded. Generate colors using color palette", param$col_palette_annotation))
        annotation_colours_out = suppressWarnings(SetAnnotationColours(scR, param$col_palette_annotation)) 
        scR = annotation_colours_out[[1]]
        param$col_annotation_ref = annotation_colours_out[[2]]
      }
    }
    
  scR
  } else {
  message("Reference dataset does not exist")
  }
}  
## An object of class Seurat 
## 6439 features across 1078 samples within 1 assay 
## Active assay: RNA (6439 features, 3000 variable features)
##  3 layers present: data, counts, scale.data
##  2 dimensional reductions calculated: pca, umap


Deciphering Cell-Cell Communication

LIANA calculates an aggregate_rank across the used methods and generates a probability distribution for ligand-receptors that are ranked consistently better than expected under a null hypothesis. By default, the aggregate_rank in ascending order and can be interpreted similar to a p-value. Alongside we provide the aggregated “Magnitude” (named magnitude_rank) and “Specificity” (specificity_rank) of each interaction.

# Change omnipathR log level (ERROR or OFF)
OmnipathR::omnipath_set_loglevel(logger::ERROR, target = 'console')

liana_res = liana_wrap(sce = sc, idents_col = "annotation", assay = "RNA", assay.type = 'logcounts', method = param$liana_methods, resource = c('Consensus'), expr_prop = 0.1, permutation = list(nperms = 500), return_all = FALSE, verbose = FALSE)
# aggregate_rank
liana_agg = liana_aggregate(liana_res, verbose = FALSE)
write.csv(liana_agg, file.path(param$path_out, "data", "liana_aggregate_rank.csv"))
# magnitude_rank and specificity_rank
liana_rank_agg = rank_aggregate(liana_res, verbose = FALSE)
write.csv(liana_rank_agg, file.path(param$path_out, "data", "liana_results.csv"))

#  aggregate_rank
knitr::kable(head(liana_agg, 25), align="l", caption="LIANA aggregate_rank, top 25 rows") %>%
  kableExtra::kable_styling(bootstrap_options=c("striped", "hover"))  %>% 
  kableExtra::scroll_box(width="100%")
# magnitude_rank and specificity_rank
knitr::kable(head(liana_rank_agg, 25), align="l", caption="LIANA magnitude_rank and specificity_rank, top 25 rows") %>%
  kableExtra::kable_styling(bootstrap_options=c("striped", "hover"))  %>% 
  kableExtra::scroll_box(width="100%")
LIANA aggregate_rank, top 25 rows
source target ligand.complex receptor.complex aggregate_rank mean_rank logfc.logfc_comb logfc.rank natmi.edge_specificity natmi.rank cellphonedb.pvalue cellphonedb.rank
Monocytes B-cells TNFSF13B TNFRSF13C 0.0012215 47.00000 2.7856216 6 0.8761565 3 0 132
Monocytes B-cells TNFSF13B CD40 0.0027377 48.33333 2.3482866 9 0.8179126 4 0 132
Monocytes Monocytes GRN SORT1 0.0033754 49.00000 2.1961055 10 0.7359849 5 0 132
Monocytes Monocytes PSAP LRP1 0.0056819 50.66667 2.7616439 7 0.5199923 13 0 132
Monocytes Monocytes PSAP SORT1 0.0065810 51.33333 2.3709823 8 0.5100402 14 0 132
Monocytes B-cells TNFSF13B TNFRSF13B 0.0065810 49.33333 1.9835360 14 0.9117102 2 0 132
Monocytes Monocytes CD14 TLR6 0.0108211 55.66667 1.9471684 17 0.4567520 18 0 132
Monocytes NK cells IL1B ADRB2 0.0108211 52.33333 1.9252123 18 0.5757635 7 0 132
Monocytes Monocytes GRN TNFRSF1B 0.0120408 52.00000 2.8381942 5 0.4409884 19 0 132
B-cells CD8+ T-cells HLA-DQA1 LAG3 0.0146700 54.00000 1.8050511 21 0.5356698 9 0 132
Monocytes CD8+ T-cells S100A8 CD69 0.0175097 62.66667 3.7367472 1 0.2472976 55 0 132
Monocytes Monocytes NRG1 MS4A4A 0.0175097 81.33333 0.6792135 111 0.9766946 1 0 132
Monocytes Monocytes LGALS9 LRP1 0.0206797 57.66667 1.6619519 25 0.4775093 16 0 132
B-cells CD8+ T-cells HLA-DQB1 LAG3 0.0223371 58.33333 1.6331241 26 0.4735673 17 0 132
Monocytes NK cells IL1B SIGIRR 0.0240560 60.66667 1.7099145 23 0.3574421 27 0 132
Monocytes NK cells SECTM1 CD7 0.0240560 58.00000 1.5863438 27 0.4882453 15 0 132
Monocytes Monocytes ANXA1 FPR1 0.0258361 58.66667 1.9721433 16 0.3275053 28 0 132
Monocytes Monocytes APP APLP2 0.0295788 59.00000 1.9764900 15 0.3151412 30 0 132
Monocytes Monocytes APP LRP1 0.0295788 57.66667 1.5450213 30 0.5254924 11 0 132
Monocytes NK cells S100A8 CD69 0.0349511 64.00000 3.6315054 2 0.2444388 58 0 132
B-cells CD8+ T-cells HLA-DRA LAG3 0.0356451 62.66667 1.4660470 33 0.3762268 23 0 132
Monocytes Monocytes TIMP2 CD44 0.0377868 62.00000 1.8390221 20 0.2981862 34 0 132
B-cells CD8+ T-cells HLA-DRB1 LAG3 0.0377868 62.66667 1.4643086 34 0.3865241 22 0 132
B-cells B-cells ST6GAL1 CD22 0.0399878 63.66667 1.6805757 24 0.2961188 35 0 132
Monocytes B-cells S100A8 CD69 0.0511082 68.66667 3.6156569 3 0.2242757 71 0 132
LIANA magnitude_rank and specificity_rank, top 25 rows
source target ligand.complex receptor.complex magnitude_rank specificity_rank magnitude_mean_rank natmi.prod_weight cellphonedb.lr.mean specificity_mean_rank logfc.logfc_comb natmi.edge_specificity cellphonedb.pvalue
Monocytes CD8+ T-cells S100A8 CD69 0.0000076 0.0175097 1.0 7.326477 3.197897 62.66667 3.7367472 0.2472976 0
Monocytes NK cells S100A8 CD69 0.0000304 0.0349511 2.0 7.241781 3.189256 64.00000 3.6315054 0.2444388 0
Monocytes B-cells S100A8 CD69 0.0000684 0.0511082 3.0 6.644426 3.128312 68.66667 3.6156569 0.2242757 0
Monocytes CD4+ T-cells S100A8 CD69 0.0001216 0.0511082 4.0 5.234948 2.984513 81.33333 3.4042166 0.1767002 0
Monocytes Monocytes GRN TNFRSF1B 0.0004864 0.0120408 6.5 4.482562 2.152250 52.00000 2.8381942 0.4409884 0
Monocytes Monocytes ANXA1 FPR1 0.0007600 0.0258361 8.5 3.813116 2.062626 58.66667 1.9721433 0.3275053 0
CD8+ T-cells CD8+ T-cells CD48 CD2 0.0012843 0.2995712 9.5 3.842218 2.015224 196.66667 0.2451358 0.0978671 0
Monocytes Monocytes PSAP LRP1 0.0012843 0.0056819 9.5 3.264083 2.194912 50.66667 2.7616439 0.5199923 0
Monocytes Monocytes TIMP2 CD44 0.0017099 0.0377868 11.5 3.742651 2.007968 62.00000 1.8390221 0.2981862 0
CD4+ T-cells CD8+ T-cells CD48 CD2 0.0019455 0.2703664 12.5 3.682539 1.963626 183.33333 0.3500985 0.0937999 0
NK cells Monocytes ANXA1 FPR1 0.0027435 0.0511082 14.5 3.440565 1.929412 70.66667 1.3956981 0.2955073 0
NK cells NK cells CLEC2B KLRF1 0.0033515 0.0511082 19.5 2.822475 1.873998 64.00000 2.0815390 0.2628499 0
B-cells CD8+ T-cells CD48 CD2 0.0036782 0.6492640 16.5 3.403725 1.873532 231.00000 -0.0089090 0.0866980 0
Monocytes CD8+ T-cells CD48 CD2 0.0040202 0.3816175 17.5 3.339887 1.852904 208.00000 0.2517856 0.0850720 0
CD8+ T-cells CD4+ T-cells CD48 CD2 0.0043774 0.8811144 20.0 2.920353 1.829596 249.00000 -0.0987362 0.0743858 0
CD8+ T-cells Monocytes ANXA1 FPR1 0.0047498 0.0511082 20.0 3.158117 1.828416 71.33333 1.4093857 0.2712481 0
NK cells CD8+ T-cells CD48 CD2 0.0051374 0.6429605 20.0 3.249439 1.823677 234.33333 -0.0045504 0.0827681 0
CD8+ T-cells NK cells CLEC2B KLRF1 0.0055402 0.0511082 25.5 2.529306 1.819794 69.33333 2.0957862 0.2355478 0
Monocytes Monocytes LGALS9 CD44 0.0059581 0.0534667 24.0 2.780072 1.818917 104.00000 1.3670928 0.1556769 0
CD4+ T-cells CD4+ T-cells CD48 CD2 0.0068397 0.6059944 24.5 2.798986 1.777999 238.66667 0.0062265 0.0712944 0
Monocytes Monocytes ADAM10 CD44 0.0073033 0.1295945 28.0 2.518577 1.767559 125.66667 1.1033269 0.1231780 0
Monocytes NK cells CLEC2B KLRF1 0.0077821 0.0511082 30.0 2.198078 1.758553 76.33333 2.0023648 0.2047014 0
B-cells CD4+ T-cells CD48 CD2 0.0087852 1.0000000 27.5 2.587068 1.687904 283.33333 -0.3527810 0.0658965 0
Monocytes B-cells TNFSF13B TNFRSF13C 0.0093096 0.0012215 26.0 2.838560 1.684821 47.00000 2.7856216 0.8761565 0
Monocytes CD4+ T-cells CD48 CD2 0.0098492 0.8354175 29.5 2.538547 1.667276 256.00000 -0.0920863 0.0646606 0
heatmap_number_celltypes = max(5, length(unique(sc$annotation)) * 0.3 + 0.5)
fig_number_celltypes = length(unique(sc$annotation)) * 3

Interaction frequencies

We plot the frequencies of interactions for each pair of potentially communicating cell types.

First, we filter interactions by aggregate_rank (<= 0.01), as a indicator for the robustly, highly ranked interactions concordant between methods and visualize the number of the inferred interactions between sender and retriever cell types. Here we assume that the number of the interactions is informative of the occurring communication events. However, this is a rather strong assumption affected by the applied filter criteria. Thus, any conclusions need to be confirmed by supporting information such as biological prior knowledge (Dimitrov et al. (2021)).

Frequency Heatmap

Heatmap displaying the interaction frequencies of all cell types.

# First, we filter interactions by aggregate_rank, as a indicator for the robustly, highly ranked interactions concordant between methods. 
# These p-values are already corrected.
liana_agg_filtered = dplyr::filter(liana_agg, aggregate_rank <= param$liana_agg_rank_threshold) 

number_ccinteractions = length(rownames(unique(liana_agg_filtered[,1:2])))
number_total_interactions = length(rownames(liana_agg_filtered))

# Test for sufficient number of interactions
if (number_ccinteractions*5 > number_total_interactions) {
  message(paste0("The liana_agg_rank_threshold ", param$liana_agg_rank_threshold, " is too stringend. Increasing threshold automatically by factor 10."))
  param$liana_agg_rank_threshold = param$liana_agg_rank_threshold * 10
  # Rerun with lower filter threshold
  liana_agg_filtered = dplyr::filter(liana_agg, aggregate_rank <= param$liana_agg_rank_threshold) 
}

× (Message)
The liana_agg_rank_threshold 0.01 is too stringend. Increasing threshold automatically by factor 10.

# Plot frequency heatmap
p = heat_freq(liana_agg_filtered, font_size = 10, pallette = c("white", "navy"))
p

Frequency Chord diagram

Chord diagram displaying the interaction frequencies of all cell types and chosen source and target cell type groups.

p1 = wrap_elements(full= ~ chord_freq(liana_agg_filtered, cex = 0.8, facing = "inside", adj = c(0.5, 0))) + 
    ggtitle("All cell types")

p2 = wrap_elements(full= ~ chord_freq(liana_agg_filtered, source_groups = param$sender, target_groups = param$receiver, cex = 0.8, facing = "inside", adj = c(0.5, 0))) + 
    ggtitle("Selected cell types")

p = p1 + p2
p


Interactions by source and target cells

The dotplot displays the specific ligand-receptor interactions between sender (Monocytes, NK cells) to receiver (NK cells, CD4+ T-cells, CD8+ T-cells, B-cells) cells. “Specificity” refers to how specific this interaction is to a cell type pair relative to the rest of the cell type pairs, while “Magnitude” is a direct measure of the expression alone. 30 interactions with the highest “Magnitude” and “Specificity” are shown.

p = liana_dotplot(liana_rank_agg, source_groups = param$sender, target_groups = param$receiver, ntop = 30, specificity = 'specificity_rank', magnitude = 'magnitude_rank', invert_specificity = TRUE, invert_magnitude = TRUE) +
  theme(text = element_text(size = 10)) +
  theme(title = element_text(size = 12, face = "bold")) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1.2, colour = "black", face = "plain", size = 10)) +
  theme(axis.text.y = element_text(size = 10)) +
  theme(axis.title.x.bottom = element_text(vjust = -1.5)) +
  theme(axis.title.y = element_text(vjust = 3.5))

p

Here, for each cell type, we plot the 10 interactions (signals) with the highest “Magnitude” and “Specificity” that the cells receive from each of the other cell types.

p_list = list()
for (i in unique(sc$annotation)) {
  p_list[[i]] = liana_dotplot(liana_rank_agg, target_groups = i, ntop = 10, specificity = 'specificity_rank', magnitude = 'magnitude_rank', invert_specificity = TRUE, invert_magnitude = TRUE) +
  AddStyle(title="", xlab=i, legend_position = "none") +
  theme(text = element_text(size = 10)) +
  theme(title = element_text(size = 12, face = "bold")) +
  theme(axis.text.x.bottom = element_blank()) +
  theme(axis.title.x = element_text(vjust = -1.5)) +
  theme(axis.title.y = element_blank())
}

p = patchwork::wrap_plots(p_list, ncol = 1)
p



Data export

# Add experiment details
Seurat::Misc(sc, "experiment") = list(project_id=param$project_id, date=Sys.Date(), species=gsub("_gene_ensembl", "", param$mart_dataset))

# Add parameter
Seurat::Misc(sc, "parameters") = param

# Add technical output (note: cannot use Misc function here)
sc@misc$technical = data.frame(ScrnaseqSessionInfo(param$path_to_git))
### Export seurat object
saveRDS(sc, file=file.path(param$path_out, "data", "sc.rds"))
### Convert data
################################################################################

### Export to Loupe Cell Browser
if (all(param$path_data$type == "10x")) { 
  
  # Export reductions (umap, pca, others)
  for(r in Seurat::Reductions(sc)) {
    write.table(Seurat::Embeddings(sc, reduction=r)[,1:2] %>% as.data.frame() %>% tibble::rownames_to_column(var="Barcode"),
                file=file.path(param$path_out, "data", paste0("Loupe_projection_", r, ".csv")), col.names=TRUE, row.names=FALSE, quote=FALSE, sep=",")
  }
  
  # Export categorical metadata
  loupe_meta = sc@meta.data
  idx_keep = sapply(1:ncol(loupe_meta), function(x) !is.numeric(loupe_meta[,x]))
  write.table(x=loupe_meta[, idx_keep] %>% tibble::rownames_to_column(var="barcode"), 
              file=file.path(param$path_out, "data", "Loupe_metadata.csv"), col.names=TRUE, row.names=FALSE, quote=FALSE, sep=",")
}



### Export as andata object (h5ad file)
# Convert Seurat v5 single cell object to anndata object
# Does not work for SCT at the moment
# However, it would contain counts and data layer
#scCustomize::as.anndata(x = sc, file_path = file.path(param$path_out, "data"), file_name = "sc_anndata.h5ad", assay="RNA", 
#                        main_layer = "data", other_layers = "counts", transer_dimreduc = TRUE, verbose = FALSE)

# scesy only worked for v3 and v4 objects
# Convert to V3/4/Assay structure
sc_v3 = scCustomize::Convert_Assay(seurat_object = sc, convert_to = "V3", assay = "RNA")

# Convert Seurat v3 single cell object to anndata object
adata = sceasy::convertFormat(sc_v3, from="seurat", to="anndata", outFile=NULL, assay=Seurat::DefaultAssay(sc_v3))
# Write to h5ad file
adata$write(file.path(param$path_out, "data", "sc_anndata.h5ad"), compression="gzip")



### Export count matrix
invisible(ExportSeuratAssayData(sc, 
                                dir=file.path(param$path_out, "data", "counts"), 
                                assays=param$assay_raw, 
                                slot="counts",
                                include_cell_metadata_cols=colnames(sc[[]]),
                                metadata_prefix=paste0(param$project_id, ".")))


### Export metadata
openxlsx::write.xlsx(x=sc[[]] %>% tibble::rownames_to_column(var="Barcode"), file=file.path(param$path_out, "data", "cell_metadata.xlsx"), rowNames=FALSE, colNames=TRUE)
Export seurat object

We export the assay data, cell metadata, clustering and visualization.

Result files:
* sc.rds: Seurat object for import into R
* cell_metadata.xlsx: Cell metadata in excel format
* counts folder: Contains raw count matrix of the entire dataset (sparse matrix)

Export as andata object

We export the assay data, cell metadata, clustering and visualization in andata format.

Result file:
* sc.h5ad: H5AD object

Export to Loupe Cell Browser

If all provided datasets are of type “10x”, we export the UMAP 2D visualization, metadata such as the cell clusters, and lists of differentially expressed genes, so you can open and work with these in the Loupe Cell Browser (https://support.10xgenomics.com/single-cell-gene-expression/software/visualization/latest/what-is-loupe-cell-browser).

Result files are:
* Loupe_projection_(umap|pca|…).csv: Seurat UMAP/PCA/… projections for visualization
* Loupe_metadata.csv: Seurat cell meta data including clusters and cell cycle phases

Projections can be imported in Loupe via “Import Projection”, cell meta data via “Import Categories” and gene sets via “Import Lists”.



Parameters and software versions

The following parameters were used to run the workflow.

Parameters
out = ScrnaseqParamsInfo(params=param)

knitr::kable(out, align="l") %>% 
  kableExtra::kable_styling(bootstrap_options=c("striped", "hover"), full_width=FALSE, position="left")
Name Value
path_to_git /mnt/ngsnfs/single_cell_dev/scRNAseq_processing/sc_analysis
path_to_basic_settings /mnt/ngsnfs/single_cell_dev/scRNAseq_processing/sc_analysis/basic_settings.R
path_to_advanced_settings /mnt/ngsnfs/single_cell_dev/scRNAseq_processing/sc_analysis/advanced_settings.R
scriptname scripts/ccc_analysis/ccc_analysis.Rmd
author kosankem
assay_raw RNA
downsample_cells_equally FALSE
cell_filter nFeature_RNA:20, NA; nCount_RNA:200, 20000; percent_mt:0, 18; Sample1:nFeature_RNA=c(20, NA), nCount_RNA=c(200, 20000), percent_mt=c(0, 18); Sample2:nFeature_RNA=c(20, NA), nCount_RNA=c(200, 20000), percent_mt=c(0, 18)
feature_filter min_counts:1; min_cells:5; Sample1:min_counts=1, min_cells=5; Sample2:min_counts=1, min_cells=5
samples_min_cells 10
norm RNA
cc_remove FALSE
cc_remove_all FALSE
cc_rescore_after_merge TRUE
integrate_samples method:merge; dimensions:30; k_anchor:10; k_weight:100; reference:; integration_function:CCAIntegration
experimental_groups homogene
pc_n 8
cluster_k 20
umap_k 30
cluster_resolution_test 0.5, 0.7, 0.8
cluster_resolution 0.6
marker_padj 0.05
marker_log2FC 1
marker_pct 0.25
enrichr_site Enrichr
enrichr_padj 0.05
col_palette_samples ggsci::pal_igv
col_palette_clusters ggsci::pal_igv
col_palette_annotation ggsci::pal_ucscgb
col #0086b3
col_bg #D3D3D3
pt_size 0.5
liana_methods logfc, natmi, cellphonedb
liana_agg_rank_threshold 0.1
project_id Testdata
data /mnt/ngsnfs/single_cell_dev/scRNAseq_processing/sc_analysis/output/Testdata/cluster_analysis/data/sc.rds
path_data name:Sample1, Sample2; type:10x, 10x; path:/mnt/ngsnfs/single_cell_dev/scRNAseq_processing/sc_analysis/data/counts/sample1/, /mnt/ngsnfs/single_cell_dev/scRNAseq_processing/sc_analysis/data/counts/sample2/
species human
sender Monocytes, NK cells
receiver NK cells, CD4+ T-cells, CD8+ T-cells, B-cells
path_out /mnt/ngsnfs/single_cell_dev/scRNAseq_processing/sc_analysis/output/Testdata/ccc_analysis
debugging_mode default_debugging
mart_dataset hsapiens_gene_ensembl
mt ^MT-
enrichr_dbs GO_Biological_Process_2023, WikiPathway_2023_Human, Azimuth_2023, CellMarker_2024
annotation_dbs BlueprintEncodeData()
annot_version 98
annot_main ensembl=ensembl_gene_id, symbol=external_gene_name, entrez=entrezgene_accession
mart_attributes ensembl_gene_id, external_gene_name, entrezgene_accession, chromosome_name, start_position, end_position, percentage_gene_gc_content, gene_biotype, strand, description
path_reference /mnt/ngsnfs/single_cell_dev/scRNAseq_processing/sc_analysis/references/hsapiens_gene_ensembl/98
reference hsapiens_gene_ensembl.v98.annot.txt
file_annot /mnt/ngsnfs/single_cell_dev/scRNAseq_processing/sc_analysis/references/hsapiens_gene_ensembl/98/hsapiens_gene_ensembl.v98.annot.txt
file_cc_genes /mnt/ngsnfs/single_cell_dev/scRNAseq_processing/sc_analysis/references/hsapiens_gene_ensembl/98/cell_cycle_markers.xlsx
col_samples Sample1=#5050FFFF, Sample2=#CE3D32FF
col_clusters 1=#5050FFFF, 2=#CE3D32FF, 3=#749B58FF, 4=#F0E685FF, 5=#466983FF, 6=#BA6338FF, 7=#5DB1DDFF
col_annotation Monocytes=#FF0000FF, CD8+ T-cells=#FF9900FF, CD4+ T-cells=#FFCC00FF, B-cells=#00FF00FF, NK cells=#6699FFFF

This report was generated using the scripts/ccc_analysis/ccc_analysis.Rmd script. Software versions were collected at run time.

Software versions
out = ScrnaseqSessionInfo(param$path_to_git)

knitr::kable(out, align="l") %>% 
  kableExtra::kable_styling(bootstrap_options=c("striped", "hover"))
Name Value
Run on: Wed Sep 04 11:16:49 AM 2024
ktrns/scrnaseq 8a210a92c96c41377c5123e641ec837d56bf06ae
Container NA
R R version 4.2.1 (2022-06-23)
Platform x86_64-pc-linux-gnu (64-bit)
Operating system Ubuntu 20.04.6 LTS
Host name hpc-rc11
Host OS #138-Ubuntu SMP Wed Jun 22 15:00:31 UTC 2022 (5.4.0-122-generic)
Packages abind1.4-5, backports1.4.1, basilisk1.17.2, basilisk.utils1.17.2, beachmat2.14.2, beeswarm0.4.0, bibtex0.5.1, Biobase2.58.0, BiocGenerics0.44.0, BiocNeighbors1.16.0, BiocParallel1.32.6, BiocSingular1.14.0, bit4.0.5, bit644.0.5, bitops1.0-7, blob1.2.4, bluster1.8.0, bslib0.6.1, cachem1.0.8, Cairo1.6-2, cellranger1.1.0, checkmate2.3.1, circlize0.4.16, cli3.6.2, clue0.3-65, cluster2.1.3, clustree0.5.1, codetools0.2-18, colorspace2.1-0, ComplexHeatmap2.14.0, cowplot1.1.3, crayon1.5.2, curl5.2.1, data.table1.15.2, DBI1.2.2, DelayedArray0.24.0, DelayedMatrixStats1.20.0, deldir2.0-4, digest0.6.34, dir.expiry1.6.0, doParallel1.0.17, dotCall641.1-1, dplyr1.1.4, dqrng0.3.2, edgeR3.40.2, ellipsis0.3.2, enrichR3.2, evaluate0.23, fansi1.0.6, farver2.1.1, fastDummies1.7.3, fastmap1.1.1, filelock1.0.3, fitdistrplus1.1-11, forcats1.0.0, foreach1.5.2, future1.33.1, future.apply1.11.1, generics0.1.3, GenomeInfoDb1.34.9, GenomeInfoDbData1.2.9, GenomicRanges1.50.2, GetoptLong1.0.5, ggbeeswarm0.7.2, ggforce0.4.2, ggplot23.5.0, ggprism1.0.5, ggraph2.2.1, ggrastr1.0.2, ggrepel0.9.5, ggridges0.5.6, ggsci3.0.1, GlobalOptions0.1.2, globals0.16.2, glue1.7.0, goftest1.2-3, graphlayouts1.1.1, gridExtra2.3, gridGraphics0.5-1, gtable0.3.4, highr0.10, hms1.1.3, htmltools0.5.7, htmlwidgets1.6.4, httpuv1.6.14, httr1.4.7, ica1.0-3, igraph2.0.2, IRanges2.32.0, irlba2.3.5.1, iterators1.0.14, janitor2.2.0, jquerylib0.1.4, jsonlite1.8.8, kableExtra1.4.0, KernSmooth2.23-20, knitcitations1.0.12, knitr1.45, labeling0.4.3, later1.3.2, lattice0.20-45, lazyeval0.2.2, leiden0.4.3.1, liana0.1.14, lifecycle1.0.4, limma3.54.2, listenv0.9.1, lmtest0.9-40, locfit1.5-9.9, logger0.3.0, lubridate1.9.3, magrittr2.0.3, MASS7.3-58, Matrix1.6-5, MatrixGenerics1.10.0, matrixStats1.1.0, memoise2.0.1, metapod1.6.0, mime0.12, miniUI0.1.1.1, munsell0.5.0, nlme3.1-157, OmnipathR3.13.22, openxlsx4.2.5.2, paletteer1.6.0, parallelly1.37.1, patchwork1.2.0, pbapply1.7-2, pillar1.9.0, pkgconfig2.0.3, plotly4.10.4, plyr1.8.9, png0.1-8, polyclip1.10-6, prettyunits1.2.0, progress1.2.3, progressr0.14.0, promises1.2.1, purrr1.0.2, R.methodsS31.8.2, R.oo1.26.0, R.utils2.12.3, R62.5.1, ragg1.2.7, RANN2.6.1, rappdirs0.3.3, RColorBrewer1.1-3, Rcpp1.0.12, RcppAnnoy0.0.22, RcppHNSW0.6.0, RCurl1.98-1.14, readr2.1.5, readxl1.4.3, RefManageR1.4.0, rematch22.1.2, remotes2.4.2.1, renv0.16.0, reshape21.4.4, reticulate1.35.0, rjson0.2.21, rlang1.1.3, rmarkdown2.25, ROCR1.0-11, RSpectra0.16-1, RSQLite2.3.5, rstudioapi0.15.0, rsvd1.0.5, Rtsne0.17, rvest1.0.4, S4Vectors0.36.2, sass0.4.8, ScaledMatrix1.6.0, scales1.3.0, scater1.26.1, scattermore1.2, scCustomize2.1.2, sceasy0.0.7, scran1.26.2, sctransform0.4.1, scuttle1.8.4, sessioninfo1.2.2, Seurat5.0.2, SeuratObject5.0.1, shape1.4.6.1, shiny1.8.0, SingleCellExperiment1.20.1, snakecase0.11.1, sp2.1-3, spam2.10-0, sparseMatrixStats1.10.0, spatstat.data3.0-4, spatstat.explore3.2-6, spatstat.geom3.2-9, spatstat.random3.2-3, spatstat.sparse3.0-3, spatstat.utils3.0-4, statmod1.5.0, stringi1.8.3, stringr1.5.1, SummarizedExperiment1.28.0, survival3.2-13, svglite2.1.3, systemfonts1.0.5, tensor1.5, textshaping0.3.7, tibble3.2.1, tidygraph1.3.1, tidyr1.3.1, tidyselect1.2.0, timechange0.3.0, tweenr2.0.3, tzdb0.4.0, utf81.2.4, uwot0.1.16, vctrs0.6.5, vipor0.4.7, viridis0.6.5, viridisLite0.4.2, withr3.0.0, WriteXLS6.7.0, xfun0.41, XML3.99-0.16.1, xml21.3.6, xtable1.8-4, XVector0.38.0, yaml2.3.8, zip2.3.1, zlibbioc1.44.0, zoo1.8-12


Credits and References

This Workflow was developed as module of the sc_analysis workflow at the Research Core Unit Genomics, Hannover Medical School (Hannover, Germany). Seurat Vignettes were initially used as templates (Hao et al. (2023), Hao et al. (2021)).

# Writes knitcitations references to references.bib file.
knitcitations::write.bibtex(file=file.path(param$path_out, "references.bib"))
Dimitrov, D., Türei, D., Boys, C., Nagai, J.S., Ramirez Flores, R.O., Kim, H., Szalai, B., Costa, I.G., Dugourd, A., Valdeolivas, A., Saez-Rodriguez, J., 2021. Comparison of resources and methods to infer cell-cell communication from single-cell RNA data. bioRxiv. https://doi.org/10.1101/2021.05.21.445160
Hao, Y., Hao, S., Andersen-Nissen, E., III, W.M.M., Zheng, S., Butler, A., Lee, M.J., Wilk, A.J., Darby, C., Zagar, M., Hoffman, P., Stoeckius, M., Papalexi, E., Mimitou, E.P., Jain, J., Srivastava, A., Stuart, T., Fleming, L.B., Yeung, B., Rogers, A.J., McElrath, J.M., Blish, C.A., Gottardo, R., Smibert, P., Satija, R., 2021. Integrated analysis of multimodal single-cell data. Cell. https://doi.org/10.1016/j.cell.2021.04.048
Hao, Y., Stuart, T., Kowalski, M.H., Choudhary, S., Hoffman, P., Hartman, A., Srivastava, A., Molla, G., Madad, S., Fernandez-Granda, C., Satija, R., 2023. Dictionary learning for integrative, multimodal and scalable single-cell analysis. Nature Biotechnology. https://doi.org/10.1038/s41587-023-01767-y